Run UMAP on CD4+ events which express a COMPASS subset. Repeat for CD8 events.
The question being asked is “What are the memory and activation profiles of Ag-specific T cells?”
This time around, don’t sample the events.
Don’t stratify by groups, but rather color the sub-localization of the different markers.
Color by Cohort and Antigen (S1, S2, NCAP, VEMP, including DMSO)
Also color by
- Degree of functionality
- Cytokine - CD45RA
- CCR7
- HLA-DR
- CD38
Boxplots?
library(openCyto)
library(CytoML)
library(flowCore)
library(flowWorkspace)
library(here)
library(tidyverse)
library(uwot)
library(ggplot2)
library(scales)
library(patchwork)
library(hues)
library(RColorBrewer)
library(ggrepel)
library(ggpubr)
library(tidyselect)
library(ggrastr)
source(here::here("scripts/20200604_Helper_Functions.R")) # for distributeEvents() and sampleGatingHierarchy()
date <- 20200815
save_output <- FALSE
rerun_dimred <- FALSE
gs <- load_gs(here::here("out/GatingSets/20200815_HAARVI_ICS_GatingSet_AllBatches_with_COMPASS_Subsets_R4.0.3"))
gs2 <- subset(gs, !(`SAMPLE ID` %in% c("37C", "BWT23", "116C", "BWT22")) &
!(`SAMPLE ID` == "551432" & STIM == "Spike 2"))
dput(gh_get_pop_paths(gs2))
## c("root", "/Time", "/Time/LD-3+", "/Time/LD-3+/1419-3+", "/Time/LD-3+/1419-3+/S",
## "/Time/LD-3+/1419-3+/S/Lymph", "/Time/LD-3+/1419-3+/S/Lymph/4+",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/CD38+",
## "/Time/LD-3+/1419-3+/S/Lymph/HLADR+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/154",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/CD45RA+",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL2",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/IL4513",
## "/Time/LD-3+/1419-3+/S/Lymph/NOT4+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+/107a",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/154", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL2",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/8+/IL4513",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/TNF", "/Time/LD-3+/1419-3+/S/Lymph/8+/CCR7+",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD45RA+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_IL17_AND_NOT_IL2_AND_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_NOT_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_NOT_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_107a_AND_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets", "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_107a_AND_NOT_154_AND_NOT_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_NOT_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_NOT_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_107a_AND_NOT_154_AND_IFNG_AND_NOT_IL17_AND_NOT_IL2_AND_NOT_IL4513_AND_TNF",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/Naive",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/TCM", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/TEMRA",
## "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/TEM", "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/Naive",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/TCM", "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/TEMRA",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/TEM", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets/HLADR+CD38+",
## "/Time/LD-3+/1419-3+/S/Lymph/8+/CD8_COMPASS_Subsets/HLADR+CD38+"
## )
cd4_gates_for_dimred <- c(
"/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
"/Time/LD-3+/1419-3+/S/Lymph/4+/TNF",
"/Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+", "/Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+",
"/Time/LD-3+/1419-3+/S/Lymph/CD38+", "/Time/LD-3+/1419-3+/S/Lymph/HLADR+")
cd4_cytokine_gates <- c("/Time/LD-3+/1419-3+/S/Lymph/4+/107a", "/Time/LD-3+/1419-3+/S/Lymph/4+/154",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IFNG", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL2",
"/Time/LD-3+/1419-3+/S/Lymph/4+/IL17", "/Time/LD-3+/1419-3+/S/Lymph/4+/IL4513",
"/Time/LD-3+/1419-3+/S/Lymph/4+/TNF")
cd4_compass_subsets_parentGate <- "/Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets"
pop_counts <- pData(gs2) %>%
left_join(gs_pop_get_count_fast(gs2, subpopulations = cd4_compass_subsets_parentGate),
by = c("rowname" = "name")) %>%
dplyr::rename(CD4_COMPASS_Subsets = Count) %>%
dplyr::select(rowname, Batch, "SAMPLE ID", STIM, Cohort, CD4_COMPASS_Subsets) %>%
dplyr::filter(!(Cohort %in% c(NA, "Healthy control", "Healthy control 2017-2018")) & STIM != "SEB")
Keep in mind that there is lopsided patient and group representation simply due to not sampling:
cd4_compass_subsets_sampleSizes_4plot <- pop_counts %>%
mutate(Cohort = factor(Cohort,
levels = c("Non-hospitalized", "Hospitalized"),
labels = c("Conv\nNon-Hosp", "Conv\nHosp")))
ggplot(cd4_compass_subsets_sampleSizes_4plot,
aes(factor(Cohort), CD4_COMPASS_Subsets)) +
geom_boxplot(outlier.shape = NA) +
geom_jitter(width = 0.15, height = 0) +
theme_bw(base_size=20) +
labs(title="ICS CD4 UMAP patient representation",
y="CD4+ COMPASS Subset+ Events\n for Dimensionality Reduction\n(not sampled)") +
facet_grid(Batch ~ STIM) +
theme(axis.title.x = element_blank())
# Extract data for dimensionality reduction (not actually sampling)
call_sampleGatingHierarchy_for_cd4 <- function(currentSampleName) {
# print(sprintf("Sampling data from %s", currentSampleName))
sampleGatingHierarchy(gs2[[currentSampleName]], cd4_compass_subsets_parentGate, n = NULL, otherGates = cd4_gates_for_dimred)
}
cd4_compass_subsets_data <- map_dfr(pop_counts$rowname, call_sampleGatingHierarchy_for_cd4)
dim(cd4_compass_subsets_data)
## [1] 123642 54
knitr::kable(head(cd4_compass_subsets_data))
| Days symptom onset to visit 1 | Sex | WELL ID | Pair ID | Hispanic? | STIM | name | Cell count | Race | Cohort | Age | Race_v2 | rowname | $DATE | Batch | Sample ID | SAMPLE ID | PLATE NAME | Collection date | EXPERIMENT NAME | filename | /Time/LD-3+/1419-3+/S/Lymph/4+/CD4_COMPASS_Subsets | /Time/LD-3+/1419-3+/S/Lymph/4+/107a | /Time/LD-3+/1419-3+/S/Lymph/4+/154 | /Time/LD-3+/1419-3+/S/Lymph/4+/IFNG | /Time/LD-3+/1419-3+/S/Lymph/4+/IL2 | /Time/LD-3+/1419-3+/S/Lymph/4+/IL17 | /Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 | /Time/LD-3+/1419-3+/S/Lymph/4+/TNF | /Time/LD-3+/1419-3+/S/Lymph/4+/CCR7+ | /Time/LD-3+/1419-3+/S/Lymph/4+/CD45RA+ | /Time/LD-3+/1419-3+/S/Lymph/CD38+ | /Time/LD-3+/1419-3+/S/Lymph/HLADR+ | Time | FSC-A | FSC-H | SSC-A | SSC-H | CD8b | TNFa | CD107a | CD154 | CD3 ECD | IL2 | CD4 | IL17a | IL4/5/13 | CD14/CD19 | CCR7 | CD38 | L/D | IFNg | CD45RA | HLADR |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 61 | M | H12 | 11 | N | NCAP | 112590.fcs | 1.6x10^7 | White | Non-hospitalized | 33 | White | 112590.fcs_366900 | 28-MAY-2020 | 1 | 90C.1.A | 90C | P2 | 2020-05-01 | 20200528_COVID_ICS-B1 | 90C_H12_H12_096.fcs | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 15.439 | 122568.24 | 100216 | 31674.96 | 30776 | 1027.0212 | 758.3488 | 158.9819 | 383.4604 | 3007.214 | 993.8220 | 1901.014 | 988.5380 | 1611.0039 | 663.5084 | 1872.624 | 1194.567 | 1324.692 | 432.7889 | 1398.416 | 967.1333 |
| 61 | M | H12 | 11 | N | NCAP | 112590.fcs | 1.6x10^7 | White | Non-hospitalized | 33 | White | 112590.fcs_366900 | 28-MAY-2020 | 1 | 90C.1.A | 90C | P2 | 2020-05-01 | 20200528_COVID_ICS-B1 | 90C_H12_H12_096.fcs | 1 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 15.485 | 142859.48 | 123614 | 32029.05 | 29130 | 286.6093 | 2734.2783 | 1114.5400 | 2350.0759 | 2810.713 | 3142.3025 | 1715.415 | 1170.4102 | 833.1918 | 492.7284 | 1535.650 | 1198.652 | 1217.047 | 1484.1797 | 1539.542 | 173.3101 |
| 61 | M | H12 | 11 | N | NCAP | 112590.fcs | 1.6x10^7 | White | Non-hospitalized | 33 | White | 112590.fcs_366900 | 28-MAY-2020 | 1 | 90C.1.A | 90C | P2 | 2020-05-01 | 20200528_COVID_ICS-B1 | 90C_H12_H12_096.fcs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 15.562 | 120323.20 | 105313 | 36177.21 | 35201 | 645.6627 | 1941.5667 | 786.9394 | 1029.9525 | 2903.400 | 1293.5875 | 1862.381 | 855.8306 | 635.2674 | 954.0318 | 1328.853 | 1188.295 | 1422.533 | 939.9696 | 1395.431 | 699.8994 |
| 61 | M | H12 | 11 | N | NCAP | 112590.fcs | 1.6x10^7 | White | Non-hospitalized | 33 | White | 112590.fcs_366900 | 28-MAY-2020 | 1 | 90C.1.A | 90C | P2 | 2020-05-01 | 20200528_COVID_ICS-B1 | 90C_H12_H12_096.fcs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 16.042 | 65290.08 | 48154 | 27090.93 | 25210 | 349.0576 | 1358.4554 | 504.7423 | 1294.9139 | 2929.793 | 734.6826 | 2022.920 | 1008.4814 | 305.6316 | 765.4292 | 1983.099 | 1334.761 | 1229.228 | 962.7062 | 1490.717 | 853.8486 |
| 61 | M | H12 | 11 | N | NCAP | 112590.fcs | 1.6x10^7 | White | Non-hospitalized | 33 | White | 112590.fcs_366900 | 28-MAY-2020 | 1 | 90C.1.A | 90C | P2 | 2020-05-01 | 20200528_COVID_ICS-B1 | 90C_H12_H12_096.fcs | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 16.486 | 66916.48 | 55401 | 17046.78 | 16637 | 664.9983 | 1958.0522 | 787.3279 | 1061.2051 | 2986.041 | 1464.6097 | 1927.694 | 1154.1066 | 385.6267 | 740.7294 | 1373.256 | 1464.787 | 1084.778 | 994.0617 | 1497.823 | 946.3115 |
| 61 | M | H12 | 11 | N | NCAP | 112590.fcs | 1.6x10^7 | White | Non-hospitalized | 33 | White | 112590.fcs_366900 | 28-MAY-2020 | 1 | 90C.1.A | 90C | P2 | 2020-05-01 | 20200528_COVID_ICS-B1 | 90C_H12_H12_096.fcs | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 1 | 0 | 0 | 16.574 | 76877.80 | 62924 | 16660.50 | 16012 | 163.2860 | 760.8881 | 518.0920 | 1345.3741 | 3152.756 | 393.7231 | 1930.848 | 1694.5745 | 2547.2505 | 812.1132 | 2584.446 | 1393.654 | 1234.604 | 537.8035 | 2242.353 | 229.1811 |
Is there maybe one cytokine that is dominating the entire sample?
If one cytokine has very high background expression (and a generous gate), it could be gated positive in a lot of events.
The high number of events expressing this cytokine could lead to it dominating the data, so that most sampled events are positive for this noisy cytokine. It would drown out real signal from other cytokines.
cytokine_dominance <- cd4_compass_subsets_data %>%
group_by(Batch) %>%
summarise_at(cd4_cytokine_gates, sum) %>%
t() %>%
as.data.frame() %>%
set_names(c("B1", "B2", "B3")) %>%
rownames_to_column("Cytokine_Gate") %>%
dplyr::filter(Cytokine_Gate != "Batch")
knitr::kable(cytokine_dominance)
| Cytokine_Gate | B1 | B2 | B3 |
|---|---|---|---|
| /Time/LD-3+/1419-3+/S/Lymph/4+/107a | 10793 | 16208 | 13801 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/154 | 10689 | 12468 | 12604 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IFNG | 7405 | 4551 | 5493 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL2 | 10368 | 8386 | 9887 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL17 | 618 | 781 | 1392 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/IL4513 | 6537 | 9705 | 11946 |
| /Time/LD-3+/1419-3+/S/Lymph/4+/TNF | 11258 | 11151 | 14956 |
cytokine_dominance %>%
pivot_longer(cols = starts_with("B"), names_to = "Batch", values_to = "Events_in_Gate") %>%
mutate(Cytokine = sub(".*4\\+\\/(.*)", "\\1", Cytokine_Gate)) %>%
ggplot(aes(Cytokine, Events_in_Gate, fill = Cytokine)) +
theme_bw(base_size=18) +
geom_bar(stat="identity") +
facet_grid(. ~ Batch) +
labs(title = "CD4 Run Cytokine Dominance by Batch")
cols_4_dimred <- c("CD3 ECD", "CD8b", "CD4",
"TNFa", "CD107a",
"CD154", "IL2", "IL17a",
"IL4/5/13", "IFNg",
"CCR7", "CD45RA",
"CD38", "HLADR")
cd4.scaled_dimred_input <- cd4_compass_subsets_data %>%
dplyr::select(Batch, all_of(cols_4_dimred)) %>%
group_by(Batch) %>%
nest() %>%
ungroup() %>%
mutate(data = lapply(data, function(df) {as.data.frame(scale(as.matrix(df)))})) %>%
unnest(cols = c(data)) %>%
rename_at(vars(all_of(cols_4_dimred)),function(x) paste0(x,".scaled")) %>%
dplyr::select(-Batch)
cd4_compass_subsets_data <- cbind(cd4_compass_subsets_data, cd4.scaled_dimred_input)
# UMAP can take a long time, so there is a rerun_dimred switch
if(rerun_dimred) {
print("Running UMAP")
set.seed(date)
print(Sys.time())
cd4_compass_subsets_dimred_out <- cd4_compass_subsets_data %>%
# Run CD3, co-receptor, cytokine, memory, and activation markers through UMAP
dplyr::select(all_of(paste0(cols_4_dimred, ".scaled"))) %>%
uwot::umap(spread = 9, min_dist = 0.02, n_threads = 7)
print(Sys.time())
cd4_compass_subsets_w_umap <- cbind(as.data.frame(cd4_compass_subsets_dimred_out) %>%
dplyr::rename(x.umap = V1, y.umap = V2),
cd4_compass_subsets_data)
if(save_output) {
saveRDS(cd4_compass_subsets_w_umap, here::here(sprintf("out/UMAP/%s_ICS_CD4_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
}
} else {
# Assuming UMAP results are already saved
print("Loading saved UMAP run")
cd4_compass_subsets_w_umap <- readRDS(here::here(sprintf("out/UMAP/%s_ICS_CD4_COMPASS_Subsets_UMAP_Unsampled.rds", date)))
}
## [1] "Loading saved UMAP run"
Shuffle data frame rows so e.g. Batch 3 doesn’t dominate foreground
set.seed(date)
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap[sample(nrow(cd4_compass_subsets_w_umap), nrow(cd4_compass_subsets_w_umap)),]
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
here::here("data/Arial_afm/ArialMT-Bold.afm"),
here::here("data/Arial_afm/ArialMT-Italic.afm"),
here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)
boolColorScheme <- c("FALSE" = "#E2E2E2", "TRUE" = "#023FA5")
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap %>%
mutate(cytokine_degree = rowSums(dplyr::select(., all_of(cd4_cytokine_gates))))
cd4_compass_subsets_w_umap <- cd4_compass_subsets_w_umap %>%
mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")),
STIM = factor(STIM, levels = c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")))
stim_labs <- c("DMSO", "S1", "S2", "NCAP", "VEMP")
names(stim_labs) <- c("DMSO", "Spike 1", "Spike 2", "NCAP", "VEMP")
cohort_labs <- c("Conv\nNon-Hosp", "Conv\nHosp")
names(cohort_labs) <- c("Non-hospitalized", "Hospitalized")
base_dimred_plot <- function(currentColumn, pointSize = 0.02, colorScheme = NA) {
p <- ggplot(cd4_compass_subsets_w_umap, aes(x=x.umap, y=y.umap,
colour=if(currentColumn %in% c("Batch", "SAMPLE ID")) {
factor(!!as.name(currentColumn))
} else {
as.logical(!!as.name(currentColumn))
})) +
geom_point_rast(shape=20, alpha=0.8, size=pointSize) +
facet_grid(Cohort ~ STIM, switch="y",
labeller = labeller(Cohort = cohort_labs, STIM = stim_labs)) +
theme_bw() +
theme(plot.title=element_text(hjust = 0.5, size=22, face="bold"),
axis.text=element_blank(),
axis.line=element_blank(),
axis.ticks=element_blank(),
axis.title=element_blank(),
strip.text=element_text(face="bold", size=22),
panel.grid.major = element_blank(),
legend.title=element_text(face="bold", size=14),
strip.text.x = element_text(margin = margin(0.15,0,0.15,0, "cm")))
if(!anyNA(colorScheme)) {
p <- p + scale_color_manual(values = colorScheme)
}
if(currentColumn %in% c("Batch", "SAMPLE ID")) {
p <- p + labs(color = currentColumn) +
guides(colour = guide_legend(override.aes = list(size=10))) +
theme(legend.title = element_text(size=15),
legend.text = element_text(size=15),
legend.position = "bottom") +
scale_colour_manual(values=as.character(iwanthue(length(unique(cd4_compass_subsets_w_umap[,currentColumn])))))
} else {
p <- p + theme(legend.position = "none")
}
p
}
base_dimred_plot("Batch")
base_dimred_plot("SAMPLE ID")
Now visualize the cytokine, memory, and activation expression localization
for(cg in cd4_gates_for_dimred) {
print(base_dimred_plot(cg, colorScheme = boolColorScheme) +
labs(title = sprintf("CD4+ COMPASS Subset+ UMAP\nColored by %s", sub(".*\\/([^(\\/)]+)", "\\1", cg))))
}